Rows in the raw count matrix is renamed using geneID. Chemicals and cell lines are separated for the downstream analysis.
suppressMessages(library(dplyr))
suppressMessages(library(tximport))
suppressMessages(library(S4Vectors))
suppressMessages(library(readr))
suppressMessages(library(EnsDb.Hsapiens.v86))
suppressMessages(library(ensembldb))
suppressMessages(library(DESeq2))
suppressMessages(library(ggplot2))
suppressMessages(library(pheatmap))
suppressMessages(library(umap))
suppressMessages(library(biomaRt))
suppressMessages(library(org.Hs.eg.db))
suppressMessages(library(DOSE))
suppressMessages(library(pathview))
suppressMessages(library(clusterProfiler))
suppressMessages(library(pheatmap))
suppressMessages(library(VennDiagram))
suppressMessages(library(RColorBrewer))
suppressMessages(library(msigdbr))
suppressMessages(library(edgeR))
suppressMessages(library(stringr))
suppressMessages(library(future))
suppressMessages(library(patchwork))
suppressMessages(library(VennDiagram))
suppressMessages(library(GSEABase))
suppressMessages(library(GSVA))
cells <- read.delim(file = "/Users/tili/Desktop/Cell_line_bulk_seq_202011/data/subreadCounts.txt")
info <- read.csv(file = "/Users/tili/Desktop/Cell_line_bulk_seq_202011/data/info.csv", header = T, sep = ";")
cells[is.na(cells)] <- 0
re_name <- function(cells) {
gene <- cells[,1]
rownames(cells) <- gene
cells <- cells[,-1]
return(cells)
}
cells <- re_name(cells)
re_order <- function(info, cells) {
genomic_idx <- match(info$sample, colnames(cells))
genomic_idx
cells <- cells[ ,genomic_idx]
all(info$sample == colnames(cells))
return(cells)
}
info <- info[-c(46,
48:49,31:32),]
cells <- re_order(info, cells)
chemical <- factor(c("DES", "KTZ"))
cell_line <- factor(c("COV434", "KGN", "Primary"))
#for (i in chemical) {
# info_chemical <- info %>% dplyr::filter(chemical == i | chemical == "DMSO")
# write.csv(info_chemical, file = paste("/Users/tili/Desktop/Cell_line_bulk_seq_202011/data/processed/info_",
# i,".csv", sep = ""))
# cells_ordered <- re_order(info_chemical, cells)
# write.csv(cells_ordered, file = paste("/Users/tili/Desktop/Cell_line_bulk_seq_202011/data/processed/cells_",
# i,".csv", sep = ""))
# }
dataset <- vector(mode = "list", length = 0)
for (i in chemical) {
print(i)
files <- paste0("/Users/tili/Desktop/Cell_line_bulk_seq_202011/data/processed/info_",
i,".csv")
info2 <- read.csv(file = files, sep = ",")
info2$groups <- as.factor(info2$groups)
info2$groups <- relevel(info2$groups, ref="KGN_DMSO")
files_cells <- paste0("/Users/tili/Desktop/Cell_line_bulk_seq_202011/data/processed/cells_",i,".csv")
cells2 <- read.delim(file = files_cells, sep = ",")
cells2 <- re_name(cells2)
if (all(info2$sample == colnames(cells2))) {
dataset[[paste0("dds_", i)]] <- DESeqDataSetFromMatrix(countData=cells2, colData = info2, design = ~ groups)
} else {
print("Rownames not equal to colnames")
}
}
## [1] "DES"
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## [1] "KTZ"
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
theme <- theme(text=element_text(size=9),
axis.text.x=element_text(color="black", angle = 90),
axis.text.y=element_text(color="black"),
axis.ticks=element_blank(),
strip.background = element_rect(colour=NA, fill=NA),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line.x = element_line(size=0.4),
axis.line.y = element_line(size=0.4),
panel.background = element_blank(),
legend.position = "bottom")
qc_plot <- list()
for (i in chemical) {
files <- paste0("/Users/tili/Desktop/Cell_line_bulk_seq_202011/data/processed/info_",
i,".csv")
info <- read.csv(file = files, sep = ",")
info$group <- relevel(factor(info$group), ref = "DMSO")
qc_plot[[i]] <- info %>%
mutate(read = colSums(counts(dataset[[paste0("dds_", i)]]))) %>%
ggplot(aes(x=sample, y=read, fill = group)) + geom_bar(stat = "identity") + theme +
ggtitle("Samples Sequenced Read") + xlab("") + scale_fill_manual(values = c("#8dd3c7","#fb8072", "#bebada"))
}
wrap_plots(ncol = 2, nrow = 1, plotlist = qc_plot) + plot_annotation(tag_levels = "A")
dataset <- lapply(dataset, function(x) {
keep <- rowSums(counts(x)) >= 10
x <- x[keep,]
})
# check how many variance does each PC explain
# summary(pca)$importance[2,] * 100
PCA_cpm <- list()
for (i in chemical) {
files <- paste0("/Users/tili/Desktop/Cell_line_bulk_seq_202011/data/processed/info_",
i,".csv")
info <- read.csv(file = files, sep = ",")
info$group <- relevel(factor(info$group), ref = "DMSO")
r <- edgeR::cpm(counts(dataset[[paste0("dds_", i)]]), normalized.lib.sizes = T, log = T)
pca <- prcomp(t(r))
df <- cbind(info, pca$x)
contri <- round(summary(pca)$importance[2,] * 100, 2)
PCA_cpm[[i]] <- ggplot(df) + geom_point(aes(x=PC1, y = PC2, color = group, shape = cell_line), size = 3) + theme +
ggtitle(paste0("PCA (cpm) - ", i)) + scale_color_manual(values = c("#8dd3c7","#fb8072", "#bebada")) +
xlab(sprintf("PC1 (%s%%)", contri[1])) + ylab(sprintf("PC2 (%s%%)", contri[2]))
}
wrap_plots(ncol = 2, nrow = 1, plotlist = PCA_cpm) + plot_annotation(tag_levels = "A") +
plot_layout(guides = "collect")
PCA_vst <- list()
for (i in chemical) {
r <- vst(dataset[[paste0("dds_", i)]], blind = T, fitType='mean')
pca <- plotPCA(r, intgroup = c("group", "cell_line"), returnData = T)
percentVar <- round(100 * attr(pca, "percentVar"))
PCA_vst[[i]] <- ggplot(pca, aes(PC1, PC2, color = group.1, shape = cell_line)) +
ggtitle(paste0("PCA (vst) - ", i)) + theme +
geom_point(size = 3) + xlab(paste0("PC1: ", percentVar[1], "% variance")) +
ylab(paste0("PC2: ", percentVar[2], "% variance")) +
scale_color_manual(values = c("#8dd3c7","#fb8072", "#bebada"))
}
wrap_plots(ncol = 2, nrow = 1, plotlist = PCA_vst) + plot_annotation(tag_levels = "A")
umap_vst <- list()
for (i in chemical) {
#r <- vst(dataset[[paste0("dds_", i, "_", j)]], blind = T, fitType='mean')
r <- edgeR::cpm(counts(dataset[[paste0("dds_", i)]]), normalized.lib.sizes = T, log = T)
files <- paste0("/Users/tili/Desktop/Cell_line_bulk_seq_202011/data/processed/info_",
i,".csv")
info <- read.csv(file = files, sep = ",")
info$group <- relevel(factor(info$group), ref = "DMSO")
set.seed(1)
#normalized_counts <- assay(r) %>% t()
normalized_counts <- t(r)
umap_results <- umap::umap(normalized_counts, n_neighbors = 3)
umap_plot_df <- data.frame(umap_results$layout) %>%
tibble::rownames_to_column("sample") %>%
dplyr::inner_join(info, by = "sample")
umap_vst[[i]] <- ggplot(umap_plot_df, aes(x = X1, y = X2, color = group, shape = cell_line)) + geom_point(size = 3) +
ggtitle(paste0("UMAP (vst) - ", i)) + theme + scale_color_manual(values = c("#8dd3c7","#fb8072", "#bebada"))
}
wrap_plots(ncol = 2, nrow = 1, plotlist = umap_vst) + plot_annotation(tag_levels = "A")
heatmap_vst <- list()
for (i in chemical) {
r <- vst(dataset[[paste0("dds_", i)]], blind = T, fitType='mean')
r_cor <- cor(assay(r), method = "pearson")
df <- as.data.frame(colData(dataset[[paste0("dds_", i)]])[,c("cell_line","group")])
heatmap_vst[[i]] <- pheatmap(r_cor, scale = "column", show_rownames = F, annotation = df, fontsize = 15,
clustering_distance_cols = "euclidean", main = paste0("Heatmap (vst) - ", i))
}
pdf("/Users/tili/Desktop/Cell_line_bulk_seq_202011/results/figures/heatmap_vst_remove_outlier.pdf")
for (i in 1:2) {
print(heatmap_vst[[i]])
grid::grid.newpage()
}
dev.off()
## quartz_off_screen
## 2
for(i in 1 : length(dataset)){
print(names(dataset)[i])
dataset[[i]] <- DESeq(dataset[[i]])
}
## [1] "dds_DES"
## estimating size factors
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## fitting model and testing
## [1] "dds_KTZ"
## estimating size factors
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
## fitting model and testing
comparisonALL <- list()
for(i in chemical){
files <- paste0("/Users/tili/Desktop/Cell_line_bulk_seq_202011/data/processed/info_", i,".csv")
info <- read.csv(file = files, sep = ",")
info$group <- as.factor(info$group)
info$groups <- as.factor(info$groups)
gro <- factor(levels(info$group)[levels(info$group) != "DMSO"])
for (j in cell_line){
for(k in gro){
p <- paste0(j,"_",k)
if (p %in% levels(info$groups)){
print(p)
comparisonALL[[p]] <- results(dataset[[paste0("dds_",i)]],
contrast = c("groups", p, paste0(j,"_DMSO")),
independentFiltering = T, pAdjustMethod = "fdr") %>%
as.data.frame() %>%
mutate(geneid = rownames(.))}
else { print(paste0(p," No result"))}
}
}
}
## [1] "COV434_DES_10-10M"
## [1] "COV434_DES_10-6M"
## [1] "KGN_DES_10-10M"
## [1] "KGN_DES_10-6M"
## [1] "Primary_DES_10-10M"
## [1] "Primary_DES_10-6M"
## [1] "COV434_KTZ_10-5M"
## [1] "COV434_KTZ_10-9M"
## [1] "KGN_KTZ_10-5M"
## [1] "KGN_KTZ_10-9M"
## [1] "Primary_KTZ_10-5M"
## [1] "Primary_KTZ_10-9M"
#significant DEG
comparison05 <- list()
for (i in 1:length(comparisonALL)) {
comparison05[[names(comparisonALL)[i]]] <- comparisonALL[[i]] %>% filter(padj<0.05)
}
comparison01 <- list()
for (i in 1:length(comparisonALL)) {
comparison01[[names(comparisonALL)[i]]] <- comparisonALL[[i]] %>% filter(padj<0.1)
}
save(dataset, comparison01, comparisonALL,
file = "/Users/tili/Desktop/Cell_line_bulk_seq_202011/results/DESeq2_X868_5_removed.Rdata")
#ensembl <- useMart(biomart = "ensembl",dataset = "hsapiens_gene_ensembl")
#output <- getBM(attributes=c("ensembl_gene_id","external_gene_name","entrezgene_id","description"), mart = ensembl)
output <- read.csv2(file = "/Users/tili/Desktop/Cell_line_bulk_seq_202011/results/mart.csv", sep = ",")
load(file = "/Users/tili/Desktop/Cell_line_bulk_seq_202011/results/DESeq2_X868_5_removed.Rdata")
get_id <- function(res.sig) {
res.sig$geneid <- rownames(res.sig)
final <- merge(res.sig, output, by.x = "geneid", by.y = "ensembl_gene_id", all.x = T, all.y = F)
return(final)
}
final_all <- lapply(comparisonALL, function(x) {
x <- get_id(x)
})
final <- lapply(comparison01, function(x) {
x <- get_id(x)
})
openxlsx::write.xlsx(final, "/Users/tili/Desktop/Cell_line_bulk_seq_202011/results/tables/not_stratified_DESeq2_x868_removed_sig01_20220110.xlsx")
read_excel_allsheets <- function(filename, tibble = FALSE) {
sheets <- readxl::excel_sheets(filename)
x <- lapply(sheets, function(X) readxl::read_excel(filename, sheet = X))
if(!tibble) x <- lapply(x, as.data.frame)
names(x) <- sheets
x
}
kgn <- read_excel_allsheets("/Users/tili/Desktop/Cell_line_bulk_seq_202011/results/tables/not_stratified_DESeq2_x868_removed_sig01_20220110.xlsx")
deg_df <- data.frame()
for (i in names(kgn)){
deg <- kgn[[i]] %>% mutate(group = rep(i, nrow(kgn[[i]])))
deg_df <- rbind(deg_df, deg)
}
set <- deg_df %>%
mutate(cellline = (ifelse(str_detect(.$group, "COV434"), "COV434",
ifelse(str_detect(.$group, "KGN"), "KGN", "Primary")))) %>%
mutate(chemical = (ifelse(str_detect(.$group, "DES"), "DES", "KTZ")))
des <- set %>% filter(chemical == "DES")
ktz <- set %>% filter(chemical == "KTZ")
cov_d <- des %>% filter(cellline == "COV434") %>% na.omit()
kgn_d <- des %>% filter(cellline == "KGN") %>% na.omit()
primary_d <- des %>% filter(cellline == "Primary") %>% na.omit()
cov_k <- ktz %>% filter(cellline == "COV434") %>% na.omit()
kgn_k <- ktz %>% filter(cellline == "KGN") %>% na.omit()
primary_k <- ktz %>% filter(cellline == "Primary") %>% na.omit()
VennDiagram::venn.diagram(
x = list(cov_d$external_gene_name, kgn_d$external_gene_name, primary_d$external_gene_name),
category.names = c("COV434" , "KGN", "Primary"),
filename = 'Venn for DES three cell lines overlap sig01.png',
output=TRUE,
# Circles
lwd = 2,
col=c("#A58AFF","#53B400", "#00B6EB"),
fill = c(alpha("#A58AFF",1),alpha("#53B400",1), alpha("#00B6EB",1)),
scaled = T,
cex = 1.5,
fontface = "bold",
fontfamily = "sans",
# Set names
cat.cex = 0.8,
cat.fontface = "bold",
cat.default.pos = "outer",
cat.fontfamily = "sans",
cat.col = c("#A58AFF","#53B400", "#00B6EB")
)
## [1] 1
VennDiagram::venn.diagram(
x = list(cov_k$external_gene_name, kgn_k$external_gene_name, primary_k$external_gene_name),
category.names = c("COV434" , "KGN", "Primary"),
filename = 'Venn for KTZ three cell lines overlap sig01.png',
output=TRUE,
# Circles
lwd = 2,
col=c("#A58AFF","#53B400", "#00B6EB"),
fill = c(alpha("#A58AFF",1),alpha("#53B400",1), alpha("#00B6EB",1)),
scaled = T,
cex = 1.5,
fontface = "bold",
fontfamily = "sans",
# Set names
cat.cex = 0.8,
cat.fontface = "bold",
cat.default.pos = "outer",
cat.fontfamily = "sans",
cat.col = c("#A58AFF","#53B400", "#00B6EB")
)
## [1] 1
des_df <- des %>% arrange(desc(.$padj)) %>% dplyr::distinct(.$geneid, .keep_all = T)
ktz_df <- ktz %>% arrange(desc(.$padj)) %>% dplyr::distinct(.$geneid, .keep_all = T)
normalized_counts <- list()
for (i in chemical) {
normalized_counts[[i]] <- counts(dataset[[paste0("dds_",i)]], normalized=T) %>%
data.frame() %>%
tibble::rownames_to_column(var="gene") %>%
as_tibble() %>%
left_join(output, by=c("gene" = "ensembl_gene_id"))
}
id_k <- match(ktz_df$geneid, normalized_counts[["KTZ"]]$gene)
ktz_count <- normalized_counts[["KTZ"]][id_k,]
id_d <- match(des_df$geneid, normalized_counts[["DES"]]$gene)
des_count <- normalized_counts[["DES"]][id_d,]
genes_k <- ktz_count[,(ncol(ktz_count)-2)]
genes_d <- des_count[,(ncol(des_count)-2)]
scaleDES <- scale(t(des_count[,2:(ncol(des_count)-4)]), scale = T, center = T) %>% t() %>%
as.data.frame() %>% cbind(., genes_d) %>%
cbind(., des_df$group) %>%
na.omit()
scaleKTZ <- scale(t(ktz_count[,2:(ncol(ktz_count)-4)]), scale = T, center = T) %>% t() %>%
as.data.frame() %>% cbind(., genes_k) %>%
cbind(., ktz_df$group) %>%
na.omit()
order <- c("COV_P24_DMSO", "COV_P26_DMSO", "COV_P27_DMSO",
"COV_P24_DES_6", "COV_P26_DES_6", "COV_P27_DES_6",
"COV_P24_DES_10", "COV_P26_DES_10", "COV_P27_DES_10",
"KGN_P7_DMSO", "KGN_P8_DMSO", "KGN_P9_DMSO",
"KGN_P7_DES_6", "KGN_P8_DES_6", "KGN_P9_DES_6",
"KGN_P7_DES_10", "KGN_P8_DES_10", "KGN_P9_DES_10",
"X126_DMSO", "X236_DMSO", "X868_DMSO",
"X126_DES_6", "X236_DES_6", "X868_DES_6",
"X126_DES_10", "X236_DES_10", "X868_DES_10")
genomic_idx <- match(order, colnames(scaleDES))
genomic_idx
## [1] 1 4 7 2 5 8 3 6 9 10 13 16 11 14 17 12 15 18 19 22 25 20 23 26 21
## [26] 24 27
scaleDES <- scaleDES[ ,genomic_idx]
anno <- data.frame(cbind(order, c(rep("COV434",9), rep("KGN",9), rep("Primary",9)))) %>%
'colnames<-' (c("Sample", "cell_line")) %>%
'rownames<-' (.$Sample) %>%
dplyr::select(cell_line) %>%
mutate(group = (rep(c(rep("DMSO",3), rep("DES_6",3), rep("DES_10",3)),3)))
pheatmap(scaleDES, cluster_rows = T, show_rownames = F,
cluster_cols = F, clustering_distance_rows = "correlation", annotation = anno,
breaks = seq(-2, 2, length.out = 90))
order1 <- c("COV_P24_DMSO", "COV_P26_DMSO", "COV_P27_DMSO",
"COV_P24_KTZ_5", "COV_P26_KTZ_5", "COV_P27_KTZ_5",
"COV_P24_KTZ_9", "COV_P26_KTZ_9", "COV_P27_KTZ_9",
"KGN_P7_DMSO", "KGN_P8_DMSO", "KGN_P9_DMSO",
"KGN_P7_KTZ_5", "KGN_P8_KTZ_5", "KGN_P9_KTZ_5",
"KGN_P7_KTZ_9", "KGN_P8_KTZ_9", "KGN_P9_KTZ_9",
"X126_DMSO", "X236_DMSO", "X868_DMSO",
"X126_KTZ_5", "X236_KTZ_5",
"X126_KTZ_9", "X236_KTZ_9", "X868_KTZ_9")
genomic_idx <- match(order1, colnames(scaleKTZ))
genomic_idx
## [1] 1 4 7 2 5 8 3 6 9 10 13 16 11 14 17 12 15 18 19 22 25 20 23 21 24
## [26] 26
scaleKTZ <- scaleKTZ[ ,genomic_idx]
anno1 <- data.frame(cbind(order1, c(rep("COV434",9), rep("KGN",9), rep("Primary",8)))) %>%
'colnames<-' (c("Sample", "cell_line")) %>%
'rownames<-' (.$Sample) %>%
dplyr::select(cell_line) %>%
mutate(group = c(rep(c(rep("DMSO",3), rep("KTZ_5",3), rep("KTZ_9",3)), 2),
c(rep("DMSO",3), rep("KTZ_5",2), rep("KTZ_9",3))))
pheatmap(scaleKTZ[,1:26], cluster_rows = T, show_rownames = F,
cluster_cols = F, clustering_distance_rows = "correlation", annotation = anno1,
breaks = seq(-1, 2, length.out = 90))
pval_threshold <- 0.1
logfc_threshold <- 1
volcano_vst <- list()
for (i in 1:length(comparisonALL)) {
deseq.results <- comparisonALL[[i]]
deseq.results <- deseq.results %>% filter(!is.na(padj))
deseq.results$threshold <- as.factor(abs(deseq.results$log2FoldChange) >= logfc_threshold &
deseq.results$padj < pval_threshold)
deseq.results <- merge(deseq.results, output[,2:3], by.x = "geneid", by.y = "ensembl_gene_id",
all.x = T, all.y = F)
volcano_vst[[names(comparisonALL)[i]]] <- ggplot(data=deseq.results,
aes(x=log2FoldChange, y=-log10(padj), colour=threshold)) +
geom_point(alpha=0.4, size=1.75) +
theme(legend.position = "none") +
theme_bw() + theme(legend.position="none") +
geom_vline(xintercept = logfc_threshold) +
geom_vline(xintercept = -logfc_threshold) +
geom_hline(yintercept = -log10(pval_threshold)) +
xlab("log2 fold change") + ylab("-log10 FDR") +
ggtitle(paste0(names(comparisonALL)[i], " DEGs")) +
ggrepel::geom_text_repel(aes(label=ifelse(padj < pval_threshold & abs(log2FoldChange) >= logfc_threshold,
external_gene_name, ''), max.overlaps=30))
}
## Warning: Ignoring unknown aesthetics: max.overlaps
## Warning: Ignoring unknown aesthetics: max.overlaps
## Warning: Ignoring unknown aesthetics: max.overlaps
## Warning: Ignoring unknown aesthetics: max.overlaps
## Warning: Ignoring unknown aesthetics: max.overlaps
## Warning: Ignoring unknown aesthetics: max.overlaps
## Warning: Ignoring unknown aesthetics: max.overlaps
## Warning: Ignoring unknown aesthetics: max.overlaps
## Warning: Ignoring unknown aesthetics: max.overlaps
## Warning: Ignoring unknown aesthetics: max.overlaps
## Warning: Ignoring unknown aesthetics: max.overlaps
## Warning: Ignoring unknown aesthetics: max.overlaps
wrap_plots(ncol = 3, plotlist = volcano_vst) + plot_annotation(tag_levels = "A")
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
## Warning: ggrepel: 20 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 19 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 226 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 154 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 18 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 30 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 31 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 58 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 71 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 24 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# coef input should be like: "group_DES_10.6M_vs_DMSO"
shrunk <- list()
for (i in chemical) {
for (j in cell_line) {
dataset[[paste0("dds_", i)]]$groups <- relevel(dataset[[paste0("dds_", i)]]$groups, ref=paste0(j,"_DMSO"))
dataset[[paste0("dds_", i)]] <- nbinomWaldTest(dataset[[paste0("dds_", i)]], maxit=1000)
groups <- levels(dataset[[paste0("dds_", i)]]$groups)
groups <- groups[groups %in% grep(j, groups, value=T)]
groups <- groups[!groups %in% grep("DMSO", groups, value=T)] %>%
str_replace_all("-",".")
for (k in groups){
print(k)
shrunk[[k]] <- lfcShrink(dataset[[paste0("dds_", i)]],
coef = match(paste0("groups_",k,"_vs_",j,"_DMSO"),
resultsNames(dataset[[paste0("dds_", i)]])), type = "apeglm") %>%
as.data.frame() %>% mutate(geneid = rownames(.))
}
}
}
## found results columns, replacing these
## [1] "COV434_DES_10.10M"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "COV434_DES_10.6M"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## found results columns, replacing these
## [1] "KGN_DES_10.10M"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "KGN_DES_10.6M"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## found results columns, replacing these
## [1] "Primary_DES_10.10M"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "Primary_DES_10.6M"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## found results columns, replacing these
## [1] "COV434_KTZ_10.5M"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "COV434_KTZ_10.9M"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## found results columns, replacing these
## [1] "KGN_KTZ_10.5M"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "KGN_KTZ_10.9M"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## found results columns, replacing these
## [1] "Primary_KTZ_10.5M"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "Primary_KTZ_10.9M"
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
shrunk_sig <- list()
for (i in 1:length(shrunk)) {
shrunk_sig[[names(shrunk)[i]]] <- shrunk[[i]] %>% filter(padj<0.1)
}
final_shrunk <- lapply(shrunk_sig, function(x) {
x <- get_id(x)
})
all_shrunk <- lapply(shrunk, function(x) {
x <- get_id(x)
})
-log10(padj) then get the sign of the log2FC. If upregulated, positive. If downregulated, negative. Then rank according to the logpadj. The nonsignificant ones will end up in the middle (which is not important in the enrichement) Almost similar to ranking with log2FC
GeneList <- list()
for(i in 1:length(all_shrunk)){
genelist <- all_shrunk[[i]] %>%
dplyr::select(entrezgene_id, log2FoldChange, padj) %>%
mutate(logpadj = ifelse(log2FoldChange < 0, log10(padj), -log10(padj))) %>%
filter(!is.na(entrezgene_id))
GeneList[[names(all_shrunk)[i]]] <- genelist$logpadj
names(GeneList[[names(all_shrunk)[i]]]) <- as.character(genelist$entrezgene_id)
GeneList[[names(all_shrunk)[i]]] <- sort(GeneList[[names(all_shrunk)[i]]], decreasing = T)
}
msigHs_c2 <- msigdbr(species = "Homo sapiens", category = "C2") %>%
dplyr::select(gs_name, entrez_gene) %>%
as.data.frame()
msigHs_h <- msigdbr(species = "Homo sapiens", category = "H") %>%
dplyr::select(gs_name, entrez_gene) %>%
as.data.frame()
gse <- list()
gse_df <- list()
for (i in 1:length(all_shrunk)) {
print(paste0("Gene set analysis for"," ", names(all_shrunk)[i]," ", "vs DMSO"))
enrich <- enricher(gene = all_shrunk[[i]]$entrezgene_id,
TERM2GENE = msigHs_h,
pvalueCutoff = 0.1,
pAdjustMethod = "BH",
minGSSize = 10,
maxGSSize = 500)
gse[[names(all_shrunk)[i]]] <- enrich
gse_df[[names(all_shrunk)[i]]] <- enrich %>% as.data.frame()
}
## [1] "Gene set analysis for COV434_DES_10.10M vs DMSO"
## [1] "Gene set analysis for COV434_DES_10.6M vs DMSO"
## [1] "Gene set analysis for KGN_DES_10.10M vs DMSO"
## [1] "Gene set analysis for KGN_DES_10.6M vs DMSO"
## [1] "Gene set analysis for Primary_DES_10.10M vs DMSO"
## [1] "Gene set analysis for Primary_DES_10.6M vs DMSO"
## [1] "Gene set analysis for COV434_KTZ_10.5M vs DMSO"
## [1] "Gene set analysis for COV434_KTZ_10.9M vs DMSO"
## [1] "Gene set analysis for KGN_KTZ_10.5M vs DMSO"
## [1] "Gene set analysis for KGN_KTZ_10.9M vs DMSO"
## [1] "Gene set analysis for Primary_KTZ_10.5M vs DMSO"
## [1] "Gene set analysis for Primary_KTZ_10.9M vs DMSO"
openxlsx::write.xlsx(gse, "/Users/tili/Desktop/Cell_line_bulk_seq_202011/results/tables/DESeq2_x868_removed_enricher_hallmark_20220110.xlsx")
gsea_hallmark <- list()
gsea_hallmark_df <- list()
for (i in 1:length(GeneList)){
print(names(GeneList)[i])
gsea <- GSEA(gene = GeneList[[i]],
TERM2GENE = msigHs_h,
pvalueCutoff = 1,
pAdjustMethod = "BH",
minGSSize = 10,
maxGSSize = 500)
gsea_hallmark[[names(GeneList)[i]]] <- gsea
gsea_hallmark_df[[names(GeneList)[i]]] <- gsea %>% as.data.frame()
}
## [1] "COV434_DES_10.10M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (97.72% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "COV434_DES_10.6M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (97.88% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "KGN_DES_10.10M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (50.17% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "KGN_DES_10.6M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (57.2% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "Primary_DES_10.10M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (95.89% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "Primary_DES_10.6M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (99.18% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
## leading edge analysis...
## done...
## [1] "COV434_KTZ_10.5M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (94.94% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "COV434_KTZ_10.9M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (93.67% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "KGN_KTZ_10.5M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (78.97% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "KGN_KTZ_10.9M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (68.18% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "Primary_KTZ_10.5M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (95.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "Primary_KTZ_10.9M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (97.29% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
gsea_c2 <- list()
gsea_c2_df <- list()
for (i in 1:length(GeneList)){
print(names(GeneList)[i])
gsea <- GSEA(gene = GeneList[[i]],
TERM2GENE = msigHs_c2,
pvalueCutoff = 1,
pAdjustMethod = "BH",
minGSSize = 10,
maxGSSize = 500)
gsea_c2[[names(GeneList)[i]]] <- gsea
gsea_c2_df[[names(GeneList)[i]]] <- gsea %>% as.data.frame()
}
## [1] "COV434_DES_10.10M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (97.72% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "COV434_DES_10.6M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (97.88% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "KGN_DES_10.10M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (50.17% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "KGN_DES_10.6M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (57.2% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "Primary_DES_10.10M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (95.89% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "Primary_DES_10.6M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (99.18% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are duplicate gene names, fgsea may produce unexpected results.
## leading edge analysis...
## done...
## [1] "COV434_KTZ_10.5M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (94.94% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "COV434_KTZ_10.9M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (93.67% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "KGN_KTZ_10.5M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (78.97% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "KGN_KTZ_10.9M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (68.18% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "Primary_KTZ_10.5M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (95.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## [1] "Primary_KTZ_10.9M"
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (97.29% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
for (i in 1:length(gse)) {
gse[[i]] <- setReadable(gse[[i]], OrgDb = org.Hs.eg.db, keyType="ENTREZID")
}
openxlsx::write.xlsx(gse, "/Users/tili/Desktop/Cell_line_bulk_seq_202011/results/tables/DESeq2_x868_removed_enricher_hallmark_20220110.xlsx")
for (i in 1:length(gsea_hallmark)) {
gsea_hallmark[i] <- setReadable(gsea_hallmark[[i]], OrgDb = org.Hs.eg.db, keyType="ENTREZID")
}
## Warning in `[<-`(`*tmp*`, i, value = setReadable(gsea_hallmark[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(gsea_hallmark[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(gsea_hallmark[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(gsea_hallmark[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(gsea_hallmark[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(gsea_hallmark[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(gsea_hallmark[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(gsea_hallmark[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(gsea_hallmark[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(gsea_hallmark[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(gsea_hallmark[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(gsea_hallmark[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
gsea_hallmark_sig <- list()
for (i in 1:length(gsea_hallmark)) {
gsea_hallmark_sig[[names(gsea_hallmark)[i]]] <- gsea_hallmark_df[[i]] %>% filter(p.adjust<0.1)
}
gsea_c2_sig <- list()
for (i in 1:length(gsea_c2)) {
gsea_c2_sig[[names(gsea_c2)[i]]] <- gsea_c2_df[[i]] %>% filter(p.adjust<0.1)
}
openxlsx::write.xlsx(gsea_hallmark, "/Users/tili/Desktop/Cell_line_bulk_seq_202011/results/tables/DESeq2_x868_removed_gsea_hallmark_20220110.xlsx")
dotplot_gsea_hallmark <- list()
for (i in 1:length(gsea_hallmark)) {
dotplot_gsea_hallmark[[names(gsea_hallmark)[[i]]]] <- dotplot(gsea_hallmark[[i]], showCategory=10, orderBy="GeneRatio") + labs(title=names(gsea_hallmark)[[i]])
}
wrap_plots(ncol = 3, dotplot_gsea_hallmark) + plot_annotation(tag_levels = "A")
dotplot_enricher_gse <- list()
for (i in 1:length(gse)) {
dotplot_enricher_gse[[names(gse)[[i]]]] <- dotplot(gse[[i]], showCategory=10, orderBy="GeneRatio") + labs(title=names(gse)[[i]])
}
wrap_plots(ncol = 3, dotplot_enricher_gse) + plot_annotation(tag_levels = "A")
emapplot_gsea_hallmark <- list()
for (i in 1:length(gsea_hallmark)) {
test <- enrichplot::pairwise_termsim(gsea_hallmark[[i]])
emapplot_gsea_hallmark[[names(gsea_hallmark)[[i]]]] <- emapplot(test, showCategory = 10)
}
wrap_plots(ncol = 3, emapplot_gsea_hallmark) + plot_annotation(tag_levels = "A")
emapplot_enricher_gse <- list()
for (i in 1:length(gse)) {
test <- enrichplot::pairwise_termsim(gse[[i]])
emapplot_enricher_gse[[names(gse)[[i]]]] <- emapplot(test, showCategory = 10)
}
wrap_plots(ncol = 3, emapplot_enricher_gse) + plot_annotation(tag_levels = "A")
enrich_KEGG <- list()
enrich_KEGG_df <- list()
for (i in 1:length(all_shrunk)){
print(names(all_shrunk)[i])
enrich <- enrichKEGG(gene = all_shrunk[[i]]$entrezgene_id,
organism = 'hsa',
pvalueCutoff = 1)
enrich_KEGG[[names(all_shrunk)[i]]] <- enrich
enrich_KEGG_df[[names(all_shrunk)[i]]] <- enrich %>% as.data.frame()
}
## [1] "COV434_DES_10.10M"
## Reading KEGG annotation online:
##
## Reading KEGG annotation online:
## [1] "COV434_DES_10.6M"
## [1] "KGN_DES_10.10M"
## [1] "KGN_DES_10.6M"
## [1] "Primary_DES_10.10M"
## [1] "Primary_DES_10.6M"
## [1] "COV434_KTZ_10.5M"
## [1] "COV434_KTZ_10.9M"
## [1] "KGN_KTZ_10.5M"
## [1] "KGN_KTZ_10.9M"
## [1] "Primary_KTZ_10.5M"
## [1] "Primary_KTZ_10.9M"
for (i in 1:length(enrich_KEGG)) {
enrich_KEGG[i] <- setReadable(enrich_KEGG[[i]], OrgDb = org.Hs.eg.db, keyType="ENTREZID")
}
## Warning in `[<-`(`*tmp*`, i, value = setReadable(enrich_KEGG[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(enrich_KEGG[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(enrich_KEGG[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(enrich_KEGG[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(enrich_KEGG[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(enrich_KEGG[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(enrich_KEGG[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(enrich_KEGG[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(enrich_KEGG[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(enrich_KEGG[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(enrich_KEGG[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
## Warning in `[<-`(`*tmp*`, i, value = setReadable(enrich_KEGG[[i]], OrgDb =
## org.Hs.eg.db, : implicit list embedding of S4 objects is deprecated
enrich_KEGG_sig <- list()
for (i in 1:length(enrich_KEGG)) {
enrich_KEGG_sig[[names(enrich_KEGG)[i]]] <- enrich_KEGG_df[[i]] %>% filter(p.adjust < 0.05)
}
########## Plot selected KEGG across groups ###########
#selected <- c("hsa04150", "hsa04114", "hsa01212", "hsa04931", "hsa04010", "hsa04310",
# "hsa00100", "hsa04012", "hsa04914")
selected <- c("hsa04350", "hsa04928", "hsa04150", "hsa04310", "hsa04022", "hsa04024",
"hsa04914", "hsa04114", "hsa04914", "hsa04919", "hsa00062", "hsa00071",
"hsa01212", "hsa01040", "hsa01212", "hsa04024", "hsa04910", "hsa04020",
"hsa04010", "hsa04151")
gse_KEGG <- list()
gse_KEGG_df <- list()
for(i in 1:length(GeneList)){
print(names(GeneList)[i])
edo <- gseKEGG(geneList = GeneList[[i]],
organism = 'hsa',
nPerm = 1000,
minGSSize = 10,
pvalueCutoff = 1,
verbose = FALSE)
gse_KEGG[[names(GeneList)[i]]] <- edo
gse_KEGG_df[[names(GeneList)[i]]] <- edo %>% as.data.frame()
}
## [1] "COV434_DES_10.10M"
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (97.72% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## [1] "COV434_DES_10.6M"
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (97.88% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## [1] "KGN_DES_10.10M"
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (50.17% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## [1] "KGN_DES_10.6M"
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (57.2% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## [1] "Primary_DES_10.10M"
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (95.89% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## [1] "Primary_DES_10.6M"
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (99.18% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
## [1] "COV434_KTZ_10.5M"
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (94.94% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## [1] "COV434_KTZ_10.9M"
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (93.67% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## [1] "KGN_KTZ_10.5M"
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (78.97% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## [1] "KGN_KTZ_10.9M"
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (68.18% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## [1] "Primary_KTZ_10.5M"
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (95.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## [1] "Primary_KTZ_10.9M"
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (97.29% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
gse_KEGG_sig <- list()
for (i in 1:length(gse_KEGG)) {
gse_KEGG_sig[[names(gse_KEGG)[i]]] <- gse_KEGG_df[[i]] %>% filter(p.adjust<0.05)
gse_KEGG_sig[["summary"]] <- rbind(gse_KEGG_sig[["summary"]], gse_KEGG_sig[[i]] %>%
mutate(chemical=names(gse_KEGG_sig)[i]))
}
ego <- list()
for (i in 1:length(all_shrunk)) {
print(paste0("GO analysis for"," ", names(all_shrunk)[i]))
ego[[names(final_shrunk)[i]]] <- enrichGO(gene = final_shrunk[[i]]$entrezgene_id,
#keyType = "ENSEMBL",
#universe = msigHs_h$entrez_gene,
universe = all_shrunk[[i]]$entrezgene_id,
OrgDb = org.Hs.eg.db, ont = "ALL",
pAdjustMethod = "BH",
qvalueCutoff = 0.05, readable = TRUE)
}
## [1] "GO analysis for COV434_DES_10.10M"
## [1] "GO analysis for COV434_DES_10.6M"
## [1] "GO analysis for KGN_DES_10.10M"
## [1] "GO analysis for KGN_DES_10.6M"
## [1] "GO analysis for Primary_DES_10.10M"
## [1] "GO analysis for Primary_DES_10.6M"
## [1] "GO analysis for COV434_KTZ_10.5M"
## [1] "GO analysis for COV434_KTZ_10.9M"
## [1] "GO analysis for KGN_KTZ_10.5M"
## [1] "GO analysis for KGN_KTZ_10.9M"
## [1] "GO analysis for Primary_KTZ_10.5M"
## [1] "GO analysis for Primary_KTZ_10.9M"
ego_dotplot <- list()
for (i in 1: length(ego)) {
ego_dotplot[[names(ego)[i]]] <- dotplot(ego[[i]], showCategory = 50, orderBy="GeneRatio") +
ggtitle(paste0("Dotplot - ",names(ego)[i])) + scale_color_continuous(low = "purple", high = "green")
}
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
#wrap_plots(ncol = 2, ego_dotplot) + plot_annotation(tag_levels = "A")
dotplot_enrich_KEGG <- list()
for (i in 1:length(enrich_KEGG)) {
dotplot_enrich_KEGG[[names(enrich_KEGG)[[i]]]] <- dotplot(enrich_KEGG[[i]], showCategory=10, orderBy="GeneRatio") + labs(title=names(enrich_KEGG)[[i]])
}
wrap_plots(ncol = 2, dotplot_enrich_KEGG) + plot_annotation(tag_levels = "A")
dotplot_gse_KEGG <- list()
for (i in 1:length(gse_KEGG)) {
dotplot_gse_KEGG[[names(gse_KEGG)[[i]]]] <- dotplot(gse_KEGG[[i]], showCategory=10, orderBy="GeneRatio", split=".sign") + facet_grid(.~.sign) + labs(title=names(gse_KEGG)[[i]])
}
wrap_plots(ncol = 2, dotplot_gse_KEGG) + plot_annotation(tag_levels = "A")
hsa04914 <- pathview(gene.data = GeneList[[7]],
pathway.id = "hsa04914",
species = "hsa",
limit = list(gene=max(abs(GeneList[[7]])), cpd=1))
## Info: Downloading xml files for hsa04914, 1/1 pathways..
## Info: Downloading png files for hsa04914, 1/1 pathways..
## 'select()' returned 1:1 mapping between keys and columns
## Info: Working in directory /Users/tili/OneDrive - Karolinska Institutet/Mac/Desktop/Cell_line_bulk_seq_202011/code
## Info: Writing image file hsa04914.pathview.png
browseKEGG(enrich_KEGG_sig[[7]], 'hsa04914')
des_matrix <- normalized_counts[["DES"]][,!(colnames(normalized_counts[["DES"]]) %in% c("gene", "X", "description", "entrezgene_id"))] %>%
as.data.frame() %>%
na.omit()
des_matrix <- des_matrix[-which(duplicated(des_matrix$external_gene_name)),] %>%
'rownames<-' (.$external_gene_name) %>%
.[,-ncol(.)]
ktz_matrix <- normalized_counts[["KTZ"]][,!(colnames(normalized_counts[["KTZ"]]) %in% c("gene", "X", "description", "entrezgene_id"))] %>%
as.data.frame() %>%
na.omit()
ktz_matrix <- ktz_matrix[-which(duplicated(ktz_matrix$external_gene_name)),] %>%
'rownames<-' (.$external_gene_name) %>%
.[,-ncol(.)]
geneset <- getGmt("/Users/tili//Desktop/course/Bioinformatics analysis/Transciptomics/code-down/data/MsigDB/v7.4/h.all.v7.4.symbols.gmt")
DES_gsva <- gsva(as.matrix(des_matrix), geneset, mx.diff = F)
## Estimating GSVA scores for 50 gene sets.
## Estimating ECDFs with Gaussian kernels
##
|
| | 0%
|
|= | 2%
|
|=== | 4%
|
|==== | 6%
|
|====== | 8%
|
|======= | 10%
|
|======== | 12%
|
|========== | 14%
|
|=========== | 16%
|
|============= | 18%
|
|============== | 20%
|
|=============== | 22%
|
|================= | 24%
|
|================== | 26%
|
|==================== | 28%
|
|===================== | 30%
|
|====================== | 32%
|
|======================== | 34%
|
|========================= | 36%
|
|=========================== | 38%
|
|============================ | 40%
|
|============================= | 42%
|
|=============================== | 44%
|
|================================ | 46%
|
|================================== | 48%
|
|=================================== | 50%
|
|==================================== | 52%
|
|====================================== | 54%
|
|======================================= | 56%
|
|========================================= | 58%
|
|========================================== | 60%
|
|=========================================== | 62%
|
|============================================= | 64%
|
|============================================== | 66%
|
|================================================ | 68%
|
|================================================= | 70%
|
|================================================== | 72%
|
|==================================================== | 74%
|
|===================================================== | 76%
|
|======================================================= | 78%
|
|======================================================== | 80%
|
|========================================================= | 82%
|
|=========================================================== | 84%
|
|============================================================ | 86%
|
|============================================================== | 88%
|
|=============================================================== | 90%
|
|================================================================ | 92%
|
|================================================================== | 94%
|
|=================================================================== | 96%
|
|===================================================================== | 98%
|
|======================================================================| 100%
KTZ_gsva <- gsva(as.matrix(ktz_matrix), geneset, mx.diff = F)
## Estimating GSVA scores for 50 gene sets.
## Estimating ECDFs with Gaussian kernels
##
|
| | 0%
|
|= | 2%
|
|=== | 4%
|
|==== | 6%
|
|====== | 8%
|
|======= | 10%
|
|======== | 12%
|
|========== | 14%
|
|=========== | 16%
|
|============= | 18%
|
|============== | 20%
|
|=============== | 22%
|
|================= | 24%
|
|================== | 26%
|
|==================== | 28%
|
|===================== | 30%
|
|====================== | 32%
|
|======================== | 34%
|
|========================= | 36%
|
|=========================== | 38%
|
|============================ | 40%
|
|============================= | 42%
|
|=============================== | 44%
|
|================================ | 46%
|
|================================== | 48%
|
|=================================== | 50%
|
|==================================== | 52%
|
|====================================== | 54%
|
|======================================= | 56%
|
|========================================= | 58%
|
|========================================== | 60%
|
|=========================================== | 62%
|
|============================================= | 64%
|
|============================================== | 66%
|
|================================================ | 68%
|
|================================================= | 70%
|
|================================================== | 72%
|
|==================================================== | 74%
|
|===================================================== | 76%
|
|======================================================= | 78%
|
|======================================================== | 80%
|
|========================================================= | 82%
|
|=========================================================== | 84%
|
|============================================================ | 86%
|
|============================================================== | 88%
|
|=============================================================== | 90%
|
|================================================================ | 92%
|
|================================================================== | 94%
|
|=================================================================== | 96%
|
|===================================================================== | 98%
|
|======================================================================| 100%
gsva_che <- list(DES_gsva, KTZ_gsva)
names(gsva_che) <- c("DES", "KTZ")
info <- read.csv(file = "/Users/tili/Desktop/Cell_line_bulk_seq_202011/data/info.csv", header = T, sep = ";")
info <- info[-c(46,48:49,31:32),]
meta <- list()
for (i in chemical) {
meta[[paste0("info_", i)]] <- info %>% filter((chemical == i) | (chemical == "DMSO"))
meta[[paste0("group_", i)]] <- factor(meta[[paste0("info_", i)]]$groups)
meta[[paste0("group_", i)]] <- str_replace_all(meta[[paste0("group_", i)]], "-", "_") %>%
as.factor() %>%
relevel(ref = "COV434_DMSO")
}
design <- list()
for (i in chemical) {
design[[i]] <- model.matrix(~meta[[paste0("group_", i)]])
colnames(design[[i]]) <- levels(meta[[paste0("group_", i)]])
}
contrast_DES <- makeContrasts(COV434_DES_10_10M-COV434_DMSO, COV434_DES_10_6M-COV434_DMSO,
KGN_DES_10_10M-KGN_DMSO, KGN_DES_10_6M-KGN_DMSO,
Primary_DES_10_10M-Primary_DMSO, Primary_DES_10_6M-Primary_DMSO,
levels = design[["DES"]])
contrast_KTZ <- makeContrasts(COV434_KTZ_10_5M-COV434_DMSO, COV434_KTZ_10_9M-COV434_DMSO,
KGN_KTZ_10_5M-KGN_DMSO, KGN_KTZ_10_9M-KGN_DMSO,
Primary_KTZ_10_5M-Primary_DMSO, Primary_KTZ_10_9M-Primary_DMSO,
levels = design[["KTZ"]])
contrast_list <- list(contrast_DES, contrast_KTZ)
names(contrast_list) <- c("DES", "KTZ")
fit_list <- list()
for (i in chemical) {
fit <- lmFit(gsva_che[[i]], design[[i]])
fit_list[[paste0("fit2_",i)]] <- contrasts.fit(fit, contrast_list[[i]])
fit_list[[paste0("fit2_",i)]] <- eBayes(fit_list[[paste0("fit2_",i)]])
}
result_list <- list()
for (i in chemical) {
result_list[[paste0("res_",i)]] <- decideTests(fit_list[[paste0("fit2_",i)]])
print(paste0("Results for ", i))
print(summary(result_list[[paste0("res_",i)]]))
}
## [1] "Results for DES"
## COV434_DES_10_10M - COV434_DMSO COV434_DES_10_6M - COV434_DMSO
## Down 0 0
## NotSig 50 49
## Up 0 1
## KGN_DES_10_10M - KGN_DMSO KGN_DES_10_6M - KGN_DMSO
## Down 0 1
## NotSig 50 49
## Up 0 0
## Primary_DES_10_10M - Primary_DMSO Primary_DES_10_6M - Primary_DMSO
## Down 0 0
## NotSig 50 50
## Up 0 0
## [1] "Results for KTZ"
## COV434_KTZ_10_5M - COV434_DMSO COV434_KTZ_10_9M - COV434_DMSO
## Down 2 1
## NotSig 44 46
## Up 4 3
## KGN_KTZ_10_5M - KGN_DMSO KGN_KTZ_10_9M - KGN_DMSO
## Down 0 0
## NotSig 49 50
## Up 1 0
## Primary_KTZ_10_5M - Primary_DMSO Primary_KTZ_10_9M - Primary_DMSO
## Down 0 0
## NotSig 50 50
## Up 0 0
comparison <- list()
for (i in chemical) {
n <- colnames(contrast_list[[i]])
for (j in n) {
comparison[[str_sub(j,1,17)]] <- topTable(fit_list[[paste0("fit2_",i)]], coef=j, n = Inf,
adjust.method = "BH",sort.by = "p")
comparison[[str_sub(j,1,17)]]$id <- rownames(comparison[[str_sub(j,1,17)]])
}
}
openxlsx::write.xlsx(comparison,
"/Users/tili/Desktop/Cell_line_bulk_seq_202011/results/tables/limma_gsva_hallmark_7.4_20220110.xlsx")
sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS/LAPACK: /Users/tili/miniconda3/envs/upx/lib/libopenblasp-r0.3.18.dylib
##
## locale:
## [1] C
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] GSVA_1.38.2 GSEABase_1.52.1
## [3] graph_1.68.0 annotate_1.68.0
## [5] XML_3.99-0.8 patchwork_1.1.1
## [7] future_1.23.0 stringr_1.4.0
## [9] edgeR_3.32.1 limma_3.46.0
## [11] msigdbr_7.4.1 RColorBrewer_1.1-2
## [13] VennDiagram_1.7.1 futile.logger_1.4.3
## [15] clusterProfiler_3.18.1 pathview_1.30.1
## [17] DOSE_3.16.0 org.Hs.eg.db_3.12.0
## [19] biomaRt_2.46.3 umap_0.2.7.0
## [21] pheatmap_1.0.12 ggplot2_3.3.5
## [23] DESeq2_1.30.1 SummarizedExperiment_1.20.0
## [25] MatrixGenerics_1.2.1 matrixStats_0.61.0
## [27] EnsDb.Hsapiens.v86_2.99.0 ensembldb_2.14.0
## [29] AnnotationFilter_1.14.0 GenomicFeatures_1.42.2
## [31] AnnotationDbi_1.52.0 Biobase_2.50.0
## [33] GenomicRanges_1.42.0 GenomeInfoDb_1.26.4
## [35] IRanges_2.24.1 readr_2.1.1
## [37] S4Vectors_0.28.1 BiocGenerics_0.36.0
## [39] tximport_1.18.0 dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 reticulate_1.22 tidyselect_1.1.1
## [4] RSQLite_2.2.8 BiocParallel_1.24.1 scatterpie_0.1.6
## [7] munsell_0.5.0 codetools_0.2-18 withr_2.4.3
## [10] colorspace_2.0-2 GOSemSim_2.16.1 highr_0.9
## [13] knitr_1.37 listenv_0.8.0 labeling_0.4.2
## [16] KEGGgraph_1.50.0 bbmle_1.0.24 GenomeInfoDbData_1.2.4
## [19] polyclip_1.10-0 bit64_4.0.5 farver_2.1.0
## [22] downloader_0.4 coda_0.19-4 parallelly_1.30.0
## [25] vctrs_0.3.8 generics_0.1.1 lambda.r_1.2.4
## [28] xfun_0.29 BiocFileCache_1.14.0 R6_2.5.1
## [31] apeglm_1.12.0 graphlayouts_0.8.0 locfit_1.5-9.4
## [34] bitops_1.0-7 cachem_1.0.6 fgsea_1.16.0
## [37] DelayedArray_0.16.3 assertthat_0.2.1 scales_1.1.1
## [40] ggraph_2.0.5 enrichplot_1.10.2 gtable_0.3.0
## [43] globals_0.14.0 tidygraph_1.2.0 rlang_0.4.12
## [46] genefilter_1.72.1 splines_4.0.5 rtracklayer_1.50.0
## [49] lazyeval_0.2.2 BiocManager_1.30.16 yaml_2.2.1
## [52] reshape2_1.4.4 qvalue_2.22.0 tools_4.0.5
## [55] ellipsis_0.3.2 jquerylib_0.1.4 Rcpp_1.0.7
## [58] plyr_1.8.6 progress_1.2.2 zlibbioc_1.36.0
## [61] purrr_0.3.4 RCurl_1.98-1.5 prettyunits_1.1.1
## [64] openssl_1.4.6 viridis_0.6.2 cowplot_1.1.1
## [67] ggrepel_0.9.1 magrittr_2.0.1 data.table_1.14.2
## [70] RSpectra_0.16-0 futile.options_1.0.1 DO.db_2.9
## [73] openxlsx_4.2.5 mvtnorm_1.1-3 ProtGenerics_1.22.0
## [76] hms_1.1.1 evaluate_0.14 xtable_1.8-4
## [79] emdbook_1.3.12 readxl_1.3.1 gridExtra_2.3
## [82] bdsmatrix_1.3-4 compiler_4.0.5 tibble_3.1.6
## [85] crayon_1.4.2 shadowtext_0.1.0 htmltools_0.5.2
## [88] tzdb_0.2.0 tidyr_1.1.4 geneplotter_1.68.0
## [91] DBI_1.1.2 tweenr_1.0.2 formatR_1.11
## [94] dbplyr_2.1.1 MASS_7.3-54 rappdirs_0.3.3
## [97] babelgene_21.4 Matrix_1.4-0 igraph_1.2.10
## [100] pkgconfig_2.0.3 rvcheck_0.1.8 GenomicAlignments_1.26.0
## [103] numDeriv_2016.8-1.1 xml2_1.3.3 bslib_0.3.1
## [106] XVector_0.30.0 digest_0.6.29 Biostrings_2.58.0
## [109] cellranger_1.1.0 rmarkdown_2.11 fastmatch_1.1-3
## [112] curl_4.3.2 Rsamtools_2.6.0 lifecycle_1.0.1
## [115] jsonlite_1.7.2 viridisLite_0.4.0 askpass_1.1
## [118] fansi_0.5.0 pillar_1.6.4 lattice_0.20-45
## [121] KEGGREST_1.30.1 fastmap_1.1.0 httr_1.4.2
## [124] survival_3.2-13 GO.db_3.12.1 glue_1.6.0
## [127] zip_2.2.0 png_0.1-7 bit_4.0.4
## [130] Rgraphviz_2.34.0 ggforce_0.3.3 stringi_1.7.6
## [133] sass_0.4.0 blob_1.2.2 memoise_2.0.1